Missingness plots

First code bashing

# packages
library(ggplot2)
library(here)
## here() starts at C:/Users/Jonathan/Desktop/unimelb/PhD/missNetsSimulations
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# true estimates
resultsFiles = list.files(path = here("Output", "20230726_missNetsTrueModels"))

# initialise list
trueCoefList = list()
trueSeList = list()

# index for how missingness is saved
missSaveLabel = c("Peter", "Todd")
missSaveValue = c("NA", "0")

# TODO:: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO:: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]

# loop to load model parameters
for(netInd in 1:6){
  
  # load in results for each network
  load(here("Output", "20230726_missNetsTrueModels", resultsFiles[[netInd]]))
  
  # and take the coefficients only
  trueCoefList[[netInd]] = coef(modelres)
  trueSeList[[netInd]] = summary(modelres)$coefficients[,2]
}
## Warning: This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## Warning: This object was fit with 'ergm' version 4.2.2 or earlier. Summarizing
## it with version 4.3 or later may return incorrect results or fail.
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", chosenMissLabel))

# set up some lists to put in the ergm re-estimations
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()

# hmm... what's the best way to handle this....
# I have specific indices,
# but there are different amounts of each file because re-estimations tend to fail
# especially for some combinations

# some regular expressions
# net1Ind = grep("net1", outputList)
# missMod2Ind = grep("missModel2", outputList)
# 
# # check which indices overlap
# net1MissMod2 = missMod2Ind[missMod2Ind %in% net1Ind]

# set a specific index for the network
chosenNetInd = 2

# file list for the chosen network index
chosenNetOutFiles = grep(paste("20230814_missNetReest_net", chosenNetInd, sep =""), outputList)

##### For the MNAR files (coefSet1), the Miss Model is always 2... so what I can do is:
## regex the MNAR files 
chosenMnarOutFiles = grep(paste("20231011_missErgmNetReest_Miss", chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)

# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)

# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
  stop("Duplicate re-estimation datafiles! BAD.")
}

# loop to load
chosenNetOutList = outputList[combinedNetOutList]

for(fileInd in 1:length(chosenNetOutList)){
  # load all files
  load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
  
  # suppress these version difference warnings. 
  suppressWarnings({
  modelResList[[fileInd]] = modelres$coefficients
  modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
  
  })
  
  
  # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
  
  # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
  if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
   missModelList[[fileInd]] = 4
  } else {
   missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }

  # propMiss is fine (or should be.)
  
  # regex to get the first number after the specified text
  propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  
}
# plotting
# note: I think I should have a plot for each missingness proportion
chosenProp = 1

# there should only be 3 miss props, 1 for 10%, 3.5 for 35%, and 6 for 60%
# subset the lists to only be a single chosen proportion
chosenPropIndex = propMissList == chosenProp

modelResChosenPropList = modelResList[chosenPropIndex]
modelSeChosenPropList = modelSeList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]

## AltStar
# making the dataset
altStarPlotData = data.frame(altStar = c(unlist(lapply(modelResChosenPropList, `[[`, 2) )), 
                             SE = c(unlist(lapply(modelSeChosenPropList, `[[`, 2))),
                             missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "marErgm", "Latent", "mnarErgm")))

# order the plot data
altStarPlotOrd = altStarPlotData[order(altStarPlotData[,"altStar"]),]

# one more ordering with the missing model type
altStarPlotOrd = altStarPlotOrd[order(altStarPlotOrd[,"missModel"]),]



# getting the true altStar value
trueAltStar = trueCoefList[[chosenNetInd]][2]
trueAltStarSE = trueSeList[[chosenNetInd]][2]

# caterpillar?
altStarPlot = ggplot( data = altStarPlotOrd,
                    aes( x = 1:nrow(altStarPlotOrd), y = altStar, col = missModel)) + 
             geom_errorbar(aes (ymin = (altStar - 1.96*SE), ymax = (altStar + 1.96*SE), width = 0.4)) + 
             xlab("") + 
             ylab("AltStar estimate (95% Confidence Int)") + 
             geom_hline(yintercept = trueAltStar, col = "darkblue") + 
             geom_hline(yintercept = 0, col = "black", lty = 2) +
             geom_point() + 
             ggtitle(paste("Diagnostics - Net", chosenNetInd, " - Prop", chosenProp, sep = "")) + 
             theme_classic() + 
             theme(axis.ticks.x = element_blank(),
                   axis.text.x = element_blank()) +
         #          legend.position = "none") +
             geom_rect(aes(xmin = -Inf,
                           xmax = Inf, 
                           ymin = (trueAltStar - 1.96*trueAltStarSE),
                           ymax = (trueAltStar + 1.96*trueAltStarSE)),
                           alpha = 0.01,
                           colour = NA,
                           fill = "grey") +
             ylim(min(altStarPlotData$altStar) - 2*max(abs(altStarPlotData$SE)), max(altStarPlotData$altStar) + 2*max(abs(altStarPlotData$SE)))

altStarPlot

## Gwesp
# making the dataset
gwespPlotData = data.frame(gwesp = c(unlist(lapply(modelResChosenPropList, `[[`, 3) )), 
                             SE = c(unlist(lapply(modelSeChosenPropList, `[[`, 3))),
                             missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "marErgm", "Latent", "mnarErgm")))

# order the plot data
gwespPlotOrd = gwespPlotData[order(gwespPlotData[,"gwesp"]),]

# one more ordering with the missing model type
gwespPlotOrd = gwespPlotOrd[order(gwespPlotOrd[,"missModel"]),]


# getting the true gwdeg value
truegwesp = trueCoefList[[chosenNetInd]][3]
truegwespSE = trueSeList[[chosenNetInd]][3]

# caterpillar?
gwespPlot = ggplot( data = gwespPlotOrd,
                    aes( x = 1:nrow(gwespPlotOrd), y = gwesp, col = missModel)) + 
             geom_errorbar(aes (ymin = (gwesp - 1.96*SE), ymax = (gwesp + 1.96*SE), width = 0.4)) + 
             xlab("") + 
             ylab("GWESP estimate (95% Confidence Int)") + 
             labs(col = "missType") + 
             geom_hline(yintercept = truegwesp, col = "darkblue") + 
             geom_hline(yintercept = 0, col = "black", lty = 2) +
             geom_point() + 
             ggtitle(paste("Diagnostics - Net", chosenNetInd, " - Prop", chosenProp, sep = "")) + 
             theme_classic() + 
             theme(axis.ticks.x = element_blank(),
                   axis.text.x = element_blank()) +
           #        legend.position = "none") +
             geom_rect(aes(xmin = -Inf,
                           xmax = Inf, 
                           ymin = (truegwesp - 1.96*truegwespSE),
                           ymax = (truegwesp + 1.96*truegwespSE)),
                           alpha = 0.01,
                           colour = NA,
                           fill = "grey") +
              ylim(min(gwespPlotOrd$gwesp) - 2*max(abs(gwespPlotOrd$SE)), max(gwespPlotOrd$gwesp) + 2*max(abs(gwespPlotOrd$SE)))
    

gwespPlot
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

# code to try computing a fail rate dataset

# fixed value for the amount of estimation attempts
estAttempts = 50

# choose a proportion
chosenProp = 1

# annoyingly how the missingness is saved is missing in this set of results so...
# there should only be one way the missingness is saved, keep it as a string
chosenMiss = chosenMissValue

# subset the lists to only be a single chosen proportion
# get the index
chosenPropIndex = propMissList == chosenProp

# and subset
propMissChosenPropList = propMissList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]

# these lists will always be used so...
# there should only be one missing proportion
# note that there can sometimes be more than one missingness proportion in the list...

# turn the list of missingness proportions into a single numeric value
failMissProp = as.numeric(names(table(unlist(propMissChosenPropList))))

# check to make sure there's only one proportion
if(length(failMissProp) != 1){
  stop("More than one missingness proportion after subsetting")
}

# calculate the success rate
successRate = as.numeric(table(unlist(missModelChosenPropList))/estAttempts)

# and which miss model it is, these can technically range from 1 to 4
failMissModel = as.numeric(names(table(unlist(missModelChosenPropList))))
  
# and put it all together
failData = data.frame(failRate = (1 - successRate), 
                      netInd = rep(chosenNetInd, length(failMissModel)),
                      missModel = failMissModel, 
                      missProp = rep(failMissProp, length(failMissModel)),
                      missSave = rep(chosenMiss, length(failMissModel)))

Turn them into functions

I want these functions to take the entire file list, choose specific networks and missingness proportions. The thing is, I might need a few helper functions beforehand to get the right data….

Multiple smaller functions doing very specific things are preferred over one giant function that does a lot of stuff all at once

### Function to turn the data from the extracted lists into plot-friendly format

# start from the list of coefficients and standard errors
# these can have varying rows per model because not all re-estimations converge


prepMissPlot = function(modelResList, modelSeList, missModelList, propMissList, chosenProp, chosenPara){

  ## prepMissPlot(modelResList, modelSeList, missModelList, propMissList, chosenProp, chosenPara) takes 
  ## four different containing various estimated values and extracted indices to be used 
  ## in making a plot-friendly dataset.
  ##
  ## Input:
  ## - modelResList: A k-long list containing the estimated model parameters for k re-estimated models.
  ##
  ## - modelSeList: A k-long list containing the estimated standard errors for k re-estimated models.
  ##                
  ## - missModelList: A k-long list containing the chosen missingness model index for k re-estimated models.
  ##                  
  ## - propMissList: A k-long list containing the missingness proportion index for k re-estimated models.
  ##
  ## - chosenProp: The specified proportion of missingness to plot. Three possible values here:
  ##               1 = 10%, 3 = 35%, 6 = 60%.
  ##
  ## - chosenPara: The specified parameter to plot. Three possible values here:
  ##               "edges" for edges, "altStar" for alternating stars, "gwesp" for gwesp.
  ##
  ## Output: 
  ## - A k x 3 dataframe containing the parameter values, standard error values, and missing model index.
  ##   The dataframe is ordered by parameter value (smallest to largest) for each missingness model.


  ## Check to make sure all the lists are of the same length
  listLengths = lapply(list(modelResList, modelSeList, missModelList, propMissList), FUN = length)
  
  ## A unit test that spits out an error if there's more than one unique length in the list of lengths
  if( length(unique(listLengths)) != 1 ){
    stop("Lists have differing lengths")
  }
  
  # subset the lists to only be a single chosen proportion
  chosenPropIndex = propMissList == chosenProp
  
  modelResChosenPropList = modelResList[chosenPropIndex]
  modelSeChosenPropList = modelSeList[chosenPropIndex]
  missModelChosenPropList = missModelList[chosenPropIndex]
  
  # Specify indices for the chosen parameter
  # doing it this way because parameter names get very lengthy
  paraIndex = c(1:3)
  modelParaNames = c("edges", "altStar", "gwesp")
  chosenParaIndex = paraIndex[modelParaNames == chosenPara]
  
  
  # making the dataset
  plotData = data.frame(parameter = c(unlist(lapply(modelResChosenPropList, `[[`, chosenParaIndex) )), 
                        SE = c(unlist(lapply(modelSeChosenPropList, `[[`, chosenParaIndex))),
                        missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "marErgm", "Latent", "mnarErgm")))
  
  # change to allow 4 = mnarErgm and change missModel 2 = marErgm
  
  # order the plot data
  plotOrd = plotData[order(plotData[,"parameter"]),]
  
  # one more ordering with the missing model type
  plotOrd = plotOrd[order(plotOrd[,"missModel"]),]
  
  # return the ordered plot data
  return(plotOrd)
}

## actual plotting function

missCaterpillarPlot = function(plotData, truePara, trueSe, chosenPara, chosenNetInd, chosenProp, chosenMiss){
  
  ## missCaterpillarPlot(plotData, truePara, trueSe, chosenPara, chosenNetInd, chosenProp) is a very specialised
  ## plotting function that produces a caterpillar plot to compare parameter estimates for very specific data.
  ##
  ## Input:
  ## - plotData: Needs to be a k x 3 data frame for k re-estimated models containing parameter values,
  ##             standard error values, and a missing model index. Ordering is optional, but visually
  ##             useful. Ideally from prepMissPlot().
  ##
  ## - truePara: A single float (can technically be integer, but unlikely). Represents the true model estimate.
  ##                
  ## - trueSe: A single float (can technically be integer, but unlikely). Represents the true standard error.
  ##
  ## - chosenPara: A string with three (but really two) options. The chosen string is the parameter that is 
  ##               plotted. The options, in order, are "edges", "altStar", and "gwesp"
  ##
  ## - chosenNetInd: An integer between 1 to 6 for current purposes. Each integer represents one of the six
  ##                 datasets from UCINet chosen for the current round of re-estimations.
  ##                
  ## - chosenProp: A single integer or float with three options representing the proportion of missingness.
  ##               Options are 1 = 10%, 3.5 = 35%, 6 = 60%.
  ##
  ## - chosenMiss: A value to indicate what missingness is saved as (e.g., NA is Peter, 0 is Todd).
  ##               Input should be strings that are directly used for the label.
  ##                  
  ## Output: 
  ## - The output should be a caterpillar plot containing with k datapoints for k re-estimated models.
  ##   Colours are labelled and represent the missingness model used. True values are represented as a 
  ##   grey rectangle. All error bars or other depictions of spread are 95% confidence intervals. Plot
  ##   title should also be adaptive to the parameters in the function.
  
  # requires ggplot2 to work
  require(ggplot2)


  # caterpillar?
  caterPlot = ggplot( data = plotData,
                      aes( x = 1:nrow(plotData), y = parameter, col = missModel)) + 
               geom_errorbar(aes (ymin = (parameter - 1.96*SE), ymax = (parameter + 1.96*SE), width = 0.4)) + 
               xlab("") + 
               ylab(paste(chosenPara,"estimate (95% Confidence Int)", sep = " ")) + 
               labs(col = "missType") + 
               geom_hline(yintercept = truePara, col = "darkblue") + 
               geom_hline(yintercept = 0, col = "black", lty = 2) +
               geom_point() + 
               ggtitle(paste("Diagnostics - Net", chosenNetInd, " - Prop", chosenProp, " - Miss", chosenMiss ,sep = "")) + 
               theme_classic() + 
               theme(axis.ticks.x = element_blank(),
                     axis.text.x = element_blank()) +
               geom_rect(aes(xmin = -Inf,
                             xmax = Inf, 
                             ymin = (truePara - 1.96*trueSe),
                             ymax = (truePara + 1.96*trueSe)),
                             alpha = 0.01,
                             colour = NA,
                             fill = "grey") +
                ylim(min(min(plotData$parameter) - 2*max(abs(plotData$SE)), (truePara - 1.96*trueSe)), 
                     max(max(plotData$parameter) + 2*max(abs(plotData$SE)), (truePara + 1.96*trueSe)))
      
  # print out the plot
  caterPlot
  
    
}


## function for calculating failure rate (for each datafile read in)
# a function for this because I overwrite which networks I read in for the caterpillar plots
# this function captures every combination of which missingness model, which chosen missingness, which missingness proportion, and which network.

prepFailPlot = function(propMissList, missModelList, chosenNetInd, chosenProp, chosenMiss, estAttempts = 50){
  
  ## prepFailPlot(propMissList, missModelList, chosenNetInd, chosenProp, chosenMiss, estAttempts = 50) calculates
  ## the failure rates of re-estimations of degraded networks with specific object type requirements.
  ##
  ## Input:
  ## - missModelList: A k-long list containing the chosen missingness model index for k re-estimated models.
  ##                  Can technically range between 1 to 4.
  ##                  
  ## - propMissList: A k-long list containing the missingness proportion index for k re-estimated models.
  ##
  ## - chosenNetInd: An integer between 1 to 6 for current purposes. Each integer represents one of the six
  ##                 datasets from UCINet chosen for the current round of re-estimations.
  ##                
  ## - chosenProp: A single integer or float with three options representing the proportion of missingness.
  ##               Options are 1 = 10%, 3 = 35% (... yes, it's annoying.), 6 = 60%.
  ##
  ## - chosenMiss: A string to indicate what missingness is saved as (e.g., "NA" is Peter, "0" is Todd).
  ##                                
  ## Output: 
  ## - The output should be a data frame containing the failure rate for the specified combination of network, 
  ##   proportion of missingness, how the missingness is saved, and all the missingness models in the input list.
    
  
  # subset the lists to only be a single chosen proportion
  # get the index
  chosenPropIndex = propMissList == chosenProp
  
  # and subset
  propMissChosenPropList = propMissList[chosenPropIndex]
  missModelChosenPropList = missModelList[chosenPropIndex]

  # turn the list of missingness proportions into a single numeric value
  failMissProp = as.numeric(names(table(unlist(propMissChosenPropList))))
  
  # check to make sure there's only one proportion
  if(length(failMissProp) != 1){
    stop("More than one missingness proportion after subsetting")
  }
  
  # calculate the success rate
  successRate = as.numeric(table(unlist(missModelChosenPropList))/estAttempts)
  
  # and which miss model it is, these can technically range from 1 to 4
  failMissModel = as.numeric(names(table(unlist(missModelChosenPropList))))
    
  # and put it all together
  failData = data.frame(failRate = (1 - successRate), 
                        netInd = rep(chosenNetInd, length(failMissModel)),
                        missModel = failMissModel, 
                        missProp = rep(failMissProp, length(failMissModel)),
                        missSave = rep(chosenMiss, length(failMissModel)))
  
  
  # return the data frame
  return(failData)

}

## no need for a function to plot fail rates for each combination since I would only want one plot

Function test run

# choose a network
chosenNetInd = 2

# generate relative bias and relative variance (...standard error?) data structures
relBiasData = data.frame(rBias = c(), netInd = c(), missSave = c(), propMiss = c(), missModel = c(), paraName = c())
relSeData = data.frame(rSe = c(),  netInd = c(), missSave = c(), propMiss = c(), missModel = c(), paraName = c())

# select a parameter
modelParaOptions = c("edges", "altStar", "gwesp")

# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
  
  # grab the chosen parameter name
  chosenParaName = modelParaOptions[chosenPara]
  
  # still need to take out true parameter values
  tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
  tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
  
  # including edges
  # test run of the plot function
  testPlotData = prepMissPlot(modelResList = modelResList,
                              modelSeList = modelSeList,
                              missModelList = missModelList,
                              propMissList = propMissList,
                              chosenProp = chunkMissProp,
                              chosenPara = chosenParaName)
  
  # brief inspection just to make sure nothing's wrong
  head(testPlotData) %>% 
    print()
  
  
  # and plot it
  missCaterpillarPlot(plotData = testPlotData,
                      truePara = tempTruePara,
                      trueSe = tempTrueSe,
                      chosenPara = chosenParaName,
                      chosenNetInd = chosenNetInd,
                      chosenProp = chunkMissProp,
                      chosenMiss = chosenMissValue) %>% 
    print()

  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)
  
}
##    parameter       SE missModel
## 8  -3.572767 1.424147     Indep
## 11 -1.985151 2.002649     Indep
## 16 -1.838034 2.499750     Indep
## 4  -1.770333 2.842933     Indep
## 5  -1.659360 2.455153     Indep
## 17 -1.632414 2.516041     Indep

##    parameter        SE missModel
## 7  -2.667740 1.1581335     Indep
## 6  -2.651687 1.1342895     Indep
## 10 -2.519181 0.8830086     Indep
## 13 -2.484591 0.9431191     Indep
## 14 -2.391916 0.9567384     Indep
## 18 -2.339140 0.8401319     Indep

##    parameter        SE missModel
## 15  3.713921 0.4693278     Indep
## 3   3.805494 0.5092882     Indep
## 18  3.847856 0.5059986     Indep
## 12  3.867887 0.5433778     Indep
## 17  3.937885 0.5014629     Indep
## 11  3.945033 0.5001198     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

# test run for the fail rate data
# a data frame to initialise
failRateData = data.frame()

# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
                            missModelList = missModelList,
                            chosenNetInd = chosenNetInd,
                            chosenProp = chunkMissProp,
                            chosenMiss = chosenMissValue)

# and bind it
failRateData = rbind(failRateData, tempFailData)

Rest of 10% Peter

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(4, 5, 6)

# TODO: vector of file names for regex
chunkRdataNames = c("20230814_missNetReest_net",
                    "20230814_missNetReest_net",
                    "20231010_missNetReest_net")

chunkMnarNames = c("20231011_missErgmNetReest_Miss",
                   "20231011_missErgmNetReest_Miss",
                   "20231011_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all chosen networks
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  
  }
  
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)
  
  }
    

  
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)

  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##     parameter       SE missModel
## 27 -2.0450471 1.355888     Indep
## 17 -1.6887316 1.372465     Indep
## 46 -1.0782637 1.986377     Indep
## 14 -1.0079846 1.794369     Indep
## 30 -0.9901474 1.690993     Indep
## 23 -0.9677308 1.709223     Indep

##    parameter        SE missModel
## 29 -2.380226 0.8948844     Indep
## 34 -2.332890 0.8567828     Indep
## 39 -2.225284 0.6481276     Indep
## 12 -2.203632 0.7067738     Indep
## 25 -2.197335 0.7339564     Indep
## 19 -2.194530 0.6131555     Indep

##    parameter        SE missModel
## 42  2.795684 0.4150812     Indep
## 35  2.841932 0.3840148     Indep
## 5   2.906557 0.3822707     Indep
## 33  2.937962 0.3854581     Indep
## 47  2.939097 0.4309207     Indep
## 11  2.950955 0.3905500     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##    parameter        SE missModel
## 13 -5.050360        NA     Indep
## 9  -4.101907 0.4138049     Indep
## 12 -4.095339 0.3877220     Indep
## 5  -4.089140 0.3838097     Indep
## 2  -4.052289 0.4108890     Indep
## 14 -4.042779 0.4425365     Indep

##     parameter        SE missModel
## 14 -0.4271309 0.2344825     Indep
## 1  -0.3689833 0.2283937     Indep
## 9  -0.3686057 0.2258536     Indep
## 2  -0.3471691 0.2368848     Indep
## 17 -0.3043420 0.2638864     Indep
## 16 -0.2999977 0.2291898     Indep

##    parameter        SE missModel
## 13 0.5529485        NA     Indep
## 15 1.3536200 0.3463476     Indep
## 4  1.4155391 0.3970444     Indep
## 10 1.5321071 0.3904505     Indep
## 3  1.5354860 0.3464884     Indep
## 11 1.5899620 0.3939409     Indep

##    parameter       SE missModel
## 33 -6.661065 1.161756     Indep
## 43 -6.520892 1.108532     Indep
## 9  -6.430122 1.040719     Indep
## 42 -6.423533 1.099057     Indep
## 40 -6.348662 1.106593     Indep
## 10 -6.331207 1.156858     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##     parameter        SE missModel
## 28 -0.9007415 0.2363367     Indep
## 45 -0.7825025 0.2400551     Indep
## 44 -0.7622703 0.2476933     Indep
## 18 -0.7536412 0.2325391     Indep
## 3  -0.7459583 0.2495798     Indep
## 16 -0.7435994 0.2477373     Indep

##    parameter        SE missModel
## 10  1.053478 0.1945004     Indep
## 29  1.204949 0.1979167     Indep
## 19  1.224828 0.2129165     Indep
## 42  1.239121 0.2127094     Indep
## 33  1.246916 0.2216857     Indep
## 9   1.261516 0.2118016     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

More missingness conditions

35% Peter

# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(2, 4, 5, 6)

# TODO: vector of file names for regex
chunkRdataNames = c("20231004_missNetReest_net",
                    "20231004_missNetReest_net",
                    "20231004_missNetReest_net",
                    "20231010_missNetReest_net")

chunkMnarNames = c("20231011_missErgmNetReest_Miss",
                   "20231011_missErgmNetReest_Miss",
                   "20231011_missErgmNetReest_Miss",
                   "20231011_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
    # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissPropFloat,
                        chosenMiss = chosenMissValue) %>% 
      print()
    
  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)
  
  }
  

  
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)
  
  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##     parameter       SE missModel
## 7  -3.3781085 1.571063     Indep
## 6  -2.8192660 1.761062     Indep
## 5  -1.2349580 3.762309     Indep
## 10 -1.0923401 4.398689     Indep
## 2  -0.9781865 3.853383     Indep
## 8  -0.5903796 2.911597     Indep

##    parameter       SE missModel
## 12 -4.558152 4.043885     Indep
## 14 -3.474548 2.506201     Indep
## 13 -3.227319 2.943347     Indep
## 1  -3.187661 2.242548     Indep
## 16 -2.702853 1.253112     Indep
## 15 -2.632289 1.020986     Indep

##    parameter        SE missModel
## 4   3.323302 0.5682986     Indep
## 1   3.437627 0.6409324     Indep
## 14  3.484401 0.6621262     Indep
## 8   3.566714 0.6259340     Indep
## 3   3.696938 0.6449210     Indep
## 11  3.741881 0.6132945     Indep

##    parameter       SE missModel
## 16 -2.540926 1.468209     Indep
## 33 -2.529681 1.691315     Indep
## 35 -2.427015 1.466072     Indep
## 29 -2.020486 1.593190     Indep
## 26 -1.943041 1.608631     Indep
## 1  -1.765409 1.619538     Indep

##    parameter       SE missModel
## 12 -3.536047 1.825336     Indep
## 23 -3.329675 2.333945     Indep
## 18 -2.818693 1.409608     Indep
## 37 -2.728979 1.230028     Indep
## 34 -2.644825 1.234967     Indep
## 10 -2.596926 1.057250     Indep

##    parameter        SE missModel
## 3   2.497359 0.4892193     Indep
## 7   2.510058 0.4429165     Indep
## 30  2.617554 0.5050321     Indep
## 2   2.640901 0.5463155     Indep
## 27  2.712829 0.4962674     Indep
## 13  2.731805 0.4496286     Indep

##    parameter        SE missModel
## 6  -4.763305 0.5927591     Indep
## 12 -4.423675 0.5544808     Indep
## 1  -4.256021 0.5488157     Indep
## 15 -4.180141 0.5269770     Indep
## 2  -4.162467 0.5045574     Indep
## 5  -4.153906 0.5770239     Indep

##     parameter        SE missModel
## 6  -0.6946950 0.2194906     Indep
## 5  -0.6897158 0.2631167     Indep
## 15 -0.6294828 0.2489500     Indep
## 12 -0.5802322 0.2540797     Indep
## 1  -0.5784914 0.2739039     Indep
## 10 -0.5771192 0.2790234     Indep

##    parameter        SE missModel
## 4   1.137951 0.4996599     Indep
## 8   1.178245 0.4959795     Indep
## 14  1.329230 0.3960599     Indep
## 7   1.429522 0.4758393     Indep
## 13  1.560362 0.4546985     Indep
## 17  1.770557 0.5469616     Indep

##    parameter       SE missModel
## 9  -6.980429 1.139007     Indep
## 11 -6.766356 1.286395     Indep
## 45 -6.555106 1.242611     Indep
## 48 -6.540552 1.100341     Indep
## 25 -6.434793 1.285338     Indep
## 41 -6.358277 1.639629     Indep

##    parameter        SE missModel
## 14 -1.277323 0.3909004     Indep
## 23 -1.185686 0.2943182     Indep
## 32 -1.167117 0.2680503     Indep
## 31 -1.159366 0.3823150     Indep
## 16 -1.136381 0.3262691     Indep
## 7  -1.063438 0.2624494     Indep

##    parameter        SE missModel
## 41 0.8504767 0.2874547     Indep
## 36 0.9067248 0.2625241     Indep
## 50 1.1343697 0.3153175     Indep
## 18 1.2385265 0.2613634     Indep
## 22 1.2590632 0.3627478     Indep
## 27 1.2599829 0.3063573     Indep

60% Peter

# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(2, 4, 5, 6)

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net",
                    "20231005_missNetReest_net",
                    "20231011_missNetReest_net",
                    "20231011_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)
  }
  
  
  
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)
  
  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter       SE missModel
## 5  -5.789352 2.062231     Indep
## 20 -4.598751 1.850594     Indep
## 8  -3.903931 2.399643     Indep
## 12 -3.574117 2.122361     Indep
## 9  -2.868342 1.920461     Indep
## 21 -1.942612 1.960238     Indep

##     parameter        SE missModel
## 7  -12.189565 14.285280     Indep
## 10  -8.636441  7.301452     Indep
## 1   -7.742465  8.269611     Indep
## 3   -7.435589  7.489878     Indep
## 14  -5.732763  5.862388     Indep
## 2   -3.727859  4.052053     Indep

##    parameter        SE missModel
## 4   2.565689 0.6949242     Indep
## 21  2.737034 0.7987535     Indep
## 6   2.812536 0.7580205     Indep
## 16  3.077702 0.9303100     Indep
## 18  3.192135 0.7561982     Indep
## 19  3.204291 0.7683349     Indep

##    parameter       SE missModel
## 6  -9.066671       NA     Indep
## 35 -3.983056 1.482096     Indep
## 25 -3.688016 1.425409     Indep
## 10 -2.518948 1.491961     Indep
## 24 -2.343804 2.379993     Indep
## 28 -2.265829 2.084995     Indep

##     parameter       SE missModel
## 13 -10.600876 9.199102     Indep
## 12  -7.608177 5.976598     Indep
## 4   -6.967102 4.550087     Indep
## 8   -5.107308 3.390178     Indep
## 32  -3.863878 3.145042     Indep
## 16  -3.732614 2.720044     Indep

##    parameter        SE missModel
## 6   1.157657        NA     Indep
## 11  1.498300 0.8333988     Indep
## 26  1.608398 0.7615361     Indep
## 17  2.343988 0.8712003     Indep
## 1   2.363417 0.6172139     Indep
## 14  2.388523 0.5638172     Indep

##    parameter        SE missModel
## 18 -7.915567        NA     Indep
## 10 -6.006189        NA     Indep
## 4  -5.514273 1.1385876     Indep
## 15 -5.061362 0.9744425     Indep
## 13 -4.919149 1.1736797     Indep
## 8  -4.631797 0.8727901     Indep

##     parameter        SE missModel
## 4  -1.0541759 0.1900847     Indep
## 13 -0.8025529 0.2567004     Indep
## 19 -0.7912228 0.2738069     Indep
## 15 -0.7703994 0.2574297     Indep
## 2  -0.7660896 0.3085943     Indep
## 11 -0.7373803 0.3518431     Indep

##    parameter        SE missModel
## 10 0.7233999        NA     Indep
## 6  0.8481073 0.6061540     Indep
## 1  1.6349917 0.7578088     Indep
## 12 1.8921170 0.7662248     Indep
## 20 1.9075172 0.8669149     Indep
## 11 1.9556487 0.6164461     Indep

##    parameter       SE missModel
## 4  -8.649119 2.080996     Indep
## 26 -8.272475 2.215487     Indep
## 11 -7.824375 1.694336     Indep
## 47 -7.136581 2.073979     Indep
## 32 -6.928534 1.651045     Indep
## 33 -6.845862 1.545135     Indep

##    parameter        SE missModel
## 44 -2.062308 0.9548628     Indep
## 41 -1.750173 1.2046102     Indep
## 10 -1.622288 0.7202489     Indep
## 14 -1.518258 0.6462700     Indep
## 28 -1.463635 0.9698189     Indep
## 1  -1.456279 0.4954156     Indep

##    parameter        SE missModel
## 47 0.6587452 0.4476198     Indep
## 26 0.6732766 0.4701941     Indep
## 11 0.8695871 0.5348307     Indep
## 4  0.9417435 0.4040563     Indep
## 9  1.0394754 0.4892135     Indep
## 20 1.0694637 0.5600053     Indep

10% Todd

# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Todd"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1, 2, 4, 5, 6)

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net",
                    "20231005_missNetReest_net",
                    "20231005_missNetReest_net",
                    "20231005_missNetReest_net",
                    "20231012_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
    
  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)  
  }
  

  
  
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)
  
  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter       SE missModel
## 8  -2.601463 1.256855     Indep
## 24 -2.413046 1.108934     Indep
## 10 -2.395926 1.374799     Indep
## 15 -2.383965 1.253623     Indep
## 9  -2.176266 1.247728     Indep
## 38 -2.170672 1.111399     Indep

##    parameter        SE missModel
## 5  -2.209327 0.5979698     Indep
## 23 -2.189741 0.6877216     Indep
## 30 -2.066400 0.5400037     Indep
## 4  -2.026562 0.4440535     Indep
## 13 -1.986430 0.4411822     Indep
## 39 -1.977470 0.4192232     Indep

##    parameter        SE missModel
## 2   2.409912 0.2867801     Indep
## 25  2.416764 0.2709456     Indep
## 3   2.762928 0.3026043     Indep
## 31  2.874716 0.3174043     Indep
## 26  2.911591 0.3013970     Indep
## 6   2.946111 0.3113748     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##     parameter       SE missModel
## 2  -1.6490952 1.703432     Indep
## 9  -1.4773923 1.747493     Indep
## 23 -1.3377862 1.848695     Indep
## 19 -0.9747808 1.645787     Indep
## 12 -0.8558571 2.243823     Indep
## 5  -0.7947030 2.103760     Indep

##    parameter        SE missModel
## 21 -2.568440 1.1667600     Indep
## 14 -2.378623 0.8922353     Indep
## 36 -2.341166 0.9318989     Indep
## 8  -2.336268 1.1878642     Indep
## 46 -2.330456 0.8169489     Indep
## 48 -2.304476 1.1932852     Indep

##    parameter        SE missModel
## 5   1.549235 0.3025049     Indep
## 29  1.710355 0.3314923     Indep
## 24  1.878453 0.3544091     Indep
## 44  1.900765 0.3554659     Indep
## 18  2.099150 0.3389286     Indep
## 39  2.122251 0.3851450     Indep

##     parameter       SE missModel
## 29 -1.6332365 1.153097     Indep
## 48 -1.3210907 1.395869     Indep
## 22 -0.9199276 1.420857     Indep
## 18 -0.7142808 1.432538     Indep
## 12 -0.6703803 1.449203     Indep
## 10 -0.6670701 1.636150     Indep

##    parameter        SE missModel
## 34 -2.003352 0.6859301     Indep
## 26 -1.947103 0.6863618     Indep
## 44 -1.940004 0.6037775     Indep
## 38 -1.911503 0.5101568     Indep
## 30 -1.903417 0.4739233     Indep
## 6  -1.901365 0.7485541     Indep

##    parameter        SE missModel
## 48  1.591746 0.3083361     Indep
## 4   1.612123 0.2829673     Indep
## 18  1.694798 0.2803756     Indep
## 39  1.788530 0.2972528     Indep
## 26  1.793877 0.2787256     Indep
## 6   1.799936 0.2751293     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##    parameter        SE missModel
## 6  -4.264797 0.3877630     Indep
## 4  -4.192364 0.3555877     Indep
## 11 -4.155292 0.3629730     Indep
## 15 -4.110912 0.3622067     Indep
## 3  -4.093634 0.3679711     Indep
## 14 -4.090357 0.3716855     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##     parameter        SE missModel
## 1  -0.1933919 0.2208256     Indep
## 11 -0.1928590 0.2280006     Indep
## 7  -0.1823017 0.2226638     Indep
## 14 -0.1312494 0.2360561     Indep
## 15 -0.1227517 0.2318603     Indep
## 13 -0.1205444 0.2556446     Indep

##    parameter        SE missModel
## 6   0.895099 0.2581558     Indep
## 9   1.169641 0.3098932     Indep
## 8   1.205350 0.3021720     Indep
## 5   1.214879 0.3145655     Indep
## 10  1.268217 0.3100610     Indep
## 4   1.294713 0.3410631     Indep

##    parameter       SE missModel
## 9  -7.562327 1.097650     Indep
## 5  -7.320652 1.176731     Indep
## 20 -7.263994 1.127838     Indep
## 33 -7.255869 1.161143     Indep
## 2  -7.248503 1.104335     Indep
## 50 -7.185518 1.135164     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##     parameter        SE missModel
## 49 -0.5511893 0.2380867     Indep
## 28 -0.5461336 0.2205140     Indep
## 18 -0.5001765 0.2197595     Indep
## 6  -0.4852246 0.2300400     Indep
## 43 -0.4411331 0.2405094     Indep
## 41 -0.4401505 0.2326350     Indep

##    parameter        SE missModel
## 10 0.7147952 0.1443590     Indep
## 20 0.7862908 0.1544625     Indep
## 5  0.8066656 0.1520125     Indep
## 9  0.8667842 0.1670487     Indep
## 40 0.8687591 0.1599717     Indep
## 11 0.8730449 0.1547718     Indep

35% Todd

# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Todd"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1, 2, 4, 5, 6)

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net",
                    "20231005_missNetReest_net",
                    "20231005_missNetReest_net",
                    "20231012_missNetReest_net",
                    "20231012_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissPropFloat,
                        chosenMiss = chosenMissValue) %>% 
      print()

  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)
  }
  
  

  
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)
  
  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter        SE missModel
## 13 -3.337059 0.8859680     Indep
## 2  -2.860776 0.7802048     Indep
## 18 -2.851610 1.0974701     Indep
## 37 -2.726147 0.8829394     Indep
## 9  -2.508242 0.9298530     Indep
## 3  -2.472002 0.8998251     Indep

##    parameter        SE missModel
## 15 -1.436287 0.4408012     Indep
## 44 -1.234459 0.5375122     Indep
## 4  -1.200908 0.4771346     Indep
## 38 -1.151209 0.4432072     Indep
## 12 -1.119609 0.3651799     Indep
## 33 -1.107068 0.5208508     Indep

##    parameter        SE missModel
## 13 0.8143432 0.1433313     Indep
## 22 0.9230283 0.1460986     Indep
## 11 0.9431998 0.1522376     Indep
## 1  0.9683054 0.1539687     Indep
## 30 0.9840596 0.1521053     Indep
## 39 1.0125468 0.1555323     Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##    parameter       SE missModel
## 18 -3.903673 1.139670     Indep
## 21 -2.816481 1.616045     Indep
## 19 -2.459853 1.432304     Indep
## 47 -2.184766 1.735768     Indep
## 4  -1.885313 1.392126     Indep
## 6  -1.847751 1.171954     Indep

##    parameter        SE missModel
## 50 -2.431769 0.9117054     Indep
## 41 -1.988648 0.9647086     Indep
## 38 -1.952683 0.7673793     Indep
## 25 -1.862815 1.0076413     Indep
## 34 -1.652086 0.6791568     Indep
## 27 -1.521875 0.7766990     Indep

##    parameter        SE missModel
## 18 0.1781799 0.1574842     Indep
## 21 0.3824131 0.1777333     Indep
## 19 0.4221073 0.1832613     Indep
## 9  0.4592685 0.1906219     Indep
## 47 0.4712505 0.1916285     Indep
## 5  0.5133984 0.1805051     Indep

##    parameter        SE missModel
## 6  -2.659930 0.8906202     Indep
## 43 -2.484435 0.9949309     Indep
## 42 -2.379924 1.0514690     Indep
## 2  -2.249203 1.0359835     Indep
## 19 -2.083134 1.2432392     Indep
## 35 -1.883939 1.1410617     Indep

##    parameter        SE missModel
## 29 -1.382605 0.6180631     Indep
## 8  -1.350535 0.4879766     Indep
## 24 -1.324427 0.4870290     Indep
## 13 -1.320548 0.5947146     Indep
## 20 -1.271720 0.5316526     Indep
## 11 -1.204599 0.5222904     Indep

##    parameter        SE missModel
## 1  0.4634877 0.1972899     Indep
## 38 0.5640863 0.1936564     Indep
## 19 0.5753617 0.2009085     Indep
## 31 0.5896167 0.2008107     Indep
## 33 0.6195691 0.1980860     Indep
## 50 0.6667230 0.2139163     Indep

##   parameter        SE missModel
## 2 -4.417349 0.4255926     Indep
## 1 -4.366253 0.3996069     Indep
## 3 -4.016838 0.3891978     Indep
## 6 -4.546791 0.4310100   marErgm
## 5 -4.453206 0.4022960   marErgm
## 7 -4.263979 0.3547582   marErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##     parameter        SE missModel
## 3 -0.02212587 0.2536399     Indep
## 1  0.30771625 0.2589552     Indep
## 2  0.39936734 0.2535769     Indep
## 4 -0.14067356 0.2359803   marErgm
## 7 -0.04922777 0.2510923   marErgm
## 5  0.43782339 0.2521844   marErgm

##   parameter        SE missModel
## 2 0.6856637 0.2854993     Indep
## 1 0.8319856 0.3128379     Indep
## 3 1.2445683 0.3393180     Indep
## 6 0.4876890 0.2788660   marErgm
## 5 0.6333639 0.2932883   marErgm
## 7 1.4323621 0.3900154   marErgm

##    parameter       SE missModel
## 9  -9.680129 1.321978     Indep
## 24 -9.409439 1.341914     Indep
## 19 -9.138619 1.273927     Indep
## 22 -8.715590 1.274077     Indep
## 5  -8.703109 1.269927     Indep
## 14 -8.598590 1.232700     Indep

##     parameter        SE missModel
## 34 -0.4271915 0.2338981     Indep
## 35 -0.2108234 0.2405158     Indep
## 29 -0.1825741 0.2283651     Indep
## 39 -0.1809863 0.2414358     Indep
## 12 -0.1535835 0.2152647     Indep
## 49 -0.0967697 0.2282145     Indep

##    parameter        SE missModel
## 24 0.2549825 0.1382805     Indep
## 43 0.2645356 0.1409349     Indep
## 3  0.3558289 0.1495326     Indep
## 10 0.3681067 0.1433609     Indep
## 41 0.3699661 0.1499506     Indep
## 19 0.3730467 0.1402606     Indep

60% Todd

# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Todd"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1, 2, 4, 5, 6)

# TODO: vector of file names for regex
chunkRdataNames = c("20231006_missNetReest_net",
                    "20231006_missNetReest_net",
                    "20231006_missNetReest_net",
                    "20231011_missNetReest_net",
                    "20231011_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss",
                   "20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
  propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}

  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
    
  # make a temp rbias/rSe data structure to bind with the data frame
  tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
                           netInd = rep(chosenNetInd, nrow(testPlotData)),
                           missSave = rep(chosenMissLabel, nrow(testPlotData)),
                           propMiss = rep(chunkMissProp, nrow(testPlotData)),
                           missModel = testPlotData$missModel,
                           paraName = rep(chosenParaName, nrow(testPlotData)))
  tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
                          netInd = rep(chosenNetInd, nrow(testPlotData)),
                          missSave = rep(chosenMissLabel, nrow(testPlotData)),
                          propMiss = rep(chunkMissProp, nrow(testPlotData)),
                          missModel = testPlotData$missModel,
                          paraName = rep(chosenParaName, nrow(testPlotData)))

  # bind them
  relBiasData = rbind(relBiasData, tempRelBias)
  relSeData = rbind(relSeData, tempRelSe)  
  }

  

  
  
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)
  
  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter        SE missModel
## 31 -4.460967 0.5810290     Indep
## 17 -4.391896 0.6414831     Indep
## 21 -4.251427 0.7467956     Indep
## 34 -4.235010 0.7519899     Indep
## 18 -3.938098 0.7389818     Indep
## 1  -3.887855 0.7437958     Indep

##      parameter        SE missModel
## 20 -0.54170566 0.3171848     Indep
## 24 -0.31403746 0.3400528     Indep
## 7  -0.24488276 0.3682580     Indep
## 19 -0.22044346 0.2847240     Indep
## 3  -0.07633112 0.2590887     Indep
## 29 -0.05303975 0.2633457     Indep

##    parameter        SE missModel
## 15 0.1147738 0.1344550     Indep
## 2  0.2438565 0.1288911     Indep
## 17 0.2489000 0.1466968     Indep
## 12 0.2720479 0.1266465     Indep
## 28 0.2784234 0.1299738     Indep
## 23 0.2830477 0.1217562     Indep

##    parameter        SE missModel
## 34 -4.117064 0.7813483     Indep
## 8  -3.866036 0.8600851     Indep
## 22 -3.575605 0.8914025     Indep
## 2  -3.486708 1.0883342     Indep
## 41 -3.341053 1.1678912     Indep
## 35 -3.238912 0.8570279     Indep

##     parameter        SE missModel
## 11 -1.6951627 0.7913244     Indep
## 45 -1.0758965 0.4881540     Indep
## 16 -0.6649367 0.4877096     Indep
## 24 -0.5145593 0.5383474     Indep
## 5  -0.5060344 0.3733735     Indep
## 26 -0.3739797 0.5027186     Indep

##     parameter        SE missModel
## 28 0.02784400 0.1756171     Indep
## 42 0.03321241 0.2095743     Indep
## 2  0.07508869 0.1726614     Indep
## 25 0.11500553 0.1873731     Indep
## 34 0.12822292 0.1818427     Indep
## 1  0.13671639 0.1920777     Indep

##    parameter        SE missModel
## 21 -4.186214 0.8410843     Indep
## 15 -3.781792 0.7574983     Indep
## 39 -3.415426 0.7451268     Indep
## 30 -3.321968 0.8540362     Indep
## 40 -3.248794 0.9085335     Indep
## 14 -3.190952 0.7852334     Indep

##     parameter        SE missModel
## 31 -0.9245958 0.4509537     Indep
## 19 -0.7191358 0.4810619     Indep
## 17 -0.4522326 0.4244435     Indep
## 36 -0.4507429 0.4204314     Indep
## 9  -0.4244602 0.3580949     Indep
## 8  -0.3759830 0.3769310     Indep

##      parameter        SE missModel
## 21 -0.32176800 0.2684163     Indep
## 15  0.01944446 0.2657635     Indep
## 42  0.07392221 0.2709689     Indep
## 6   0.13021100 0.2911042     Indep
## 41  0.19774587 0.2389817     Indep
## 40  0.21448691 0.2095101     Indep

##       parameter         SE missModel
## 1     -4.749368  0.4281441     Indep
## 3     -4.298512  0.5375795     Indep
## 2     -4.171256  0.5552278     Indep
## 8 -17812.803613 10.6302992   marErgm
## 5     -4.781183  0.5044661   marErgm
## 4     -4.535313  0.5246310   marErgm

##   parameter        SE missModel
## 2 0.1902914 0.3616075     Indep
## 3 0.4397348 0.2924576     Indep
## 1 0.6479313 0.2579289     Indep
## 7 0.1221795 0.3665846   marErgm
## 6 0.3604831 0.3471392   marErgm
## 4 0.6110184 0.2724064   marErgm

##     parameter            SE missModel
## 1   0.3854443  3.015044e-01     Indep
## 3   0.4696144  3.230628e-01     Indep
## 2   0.8850460  4.268974e-01     Indep
## 8 -22.0702887 2.680862e+115   marErgm
## 5   0.1396263  3.000309e-01   marErgm
## 4   0.2854597  2.927772e-01   marErgm

##    parameter       SE missModel
## 4  -11.67967 1.761618     Indep
## 49 -11.60233 1.668491     Indep
## 19 -10.86087 1.509740     Indep
## 33 -10.81512 1.729544     Indep
## 34 -10.27515 1.710123     Indep
## 26 -10.15169 1.589956     Indep

##      parameter        SE missModel
## 41 -0.29712466 0.2683588     Indep
## 33 -0.20487467 0.2645884     Indep
## 24 -0.18751920 0.2627259     Indep
## 12 -0.14089183 0.2527441     Indep
## 2   0.01010340 0.2545368     Indep
## 5   0.02821066 0.2418109     Indep

##       parameter        SE missModel
## 50 -0.164454094 0.2209245     Indep
## 25 -0.131737953 0.1781773     Indep
## 18 -0.093227217 0.2367062     Indep
## 29 -0.082722105 0.2330193     Indep
## 42 -0.040174880 0.2351467     Indep
## 31  0.006732098 0.1901246     Indep

Sparse plots

The plots below are specifically plots with very sparse amounts of successful re-estimations.

10% Peter

# 10% missingness - Peter
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1)

# note: 
# net 3 is so messed up it has ONE converged estimation for this set 
# 20231010_missNetReest_net3_missModel1_prop1_trial9.RData

# handle fail rates separately. these data structures are very unstandardised.

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")

chunkMnarNames = c("20231011_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  }
   
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)

  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##     parameter       SE missModel
## 1  0.06730704 2.831189  mnarErgm
## 6  0.41110006 2.700997  mnarErgm
## 10 0.44803684 3.233691  mnarErgm
## 7  0.62303532 3.283356  mnarErgm
## 8  1.28406049 3.326750  mnarErgm
## 9  1.40853051 3.443864  mnarErgm

##    parameter        SE missModel
## 5  -3.012581 1.1944335  mnarErgm
## 3  -2.811731 1.0764746  mnarErgm
## 11 -2.705572 0.9157626  mnarErgm
## 2  -2.677974 0.9916606  mnarErgm
## 4  -2.656809 0.9220835  mnarErgm
## 9  -2.590910 0.8378803  mnarErgm

##   parameter        SE missModel
## 5  3.275134 0.3421808  mnarErgm
## 9  3.344275 0.3413299  mnarErgm
## 2  3.398827 0.3541200  mnarErgm
## 8  3.406120 0.3646437  mnarErgm
## 3  3.407984 0.3722419  mnarErgm
## 4  3.423570 0.3405085  mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

# the 'converged' model for net 3
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()

# set a specific index for the network
chosenNetInd = 3

# file list for the chosen network index
chosenNetOutFiles = grep(paste("20231010_missNetReest_net", chosenNetInd, sep =""), outputList)


# loop to load
chosenNetOutList = outputList[chosenNetOutFiles]

for(fileInd in 1:length(chosenNetOutList)){
  # load all files
  load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
  
  # suppress these version difference warnings. 
  suppressWarnings({
  modelResList[[fileInd]] = modelres$coefficients
  modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
  
  })
  
   missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    
  # propmiss should be the same
  propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}

  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  }
##   parameter SE missModel
## 1 -89.26956 NA     Indep

##   parameter SE missModel
## 1   21.2504 NA     Indep

##   parameter SE missModel
## 1  2.785266 NA     Indep

# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
                            missModelList = missModelList,
                            chosenNetInd = chosenNetInd,
                            chosenProp = chunkMissProp,
                            chosenMiss = chosenMissValue)

# and bind it
failRateData = rbind(failRateData, tempFailData)

35% Peter

# 35% missingness
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1)

# note: 
# net 3 is still so messed up it has three converged estimation for this set (out of 200)
# 20231010_missNetReest_net3_missModel2_prop3.5_trial11.RData
# 20231010_missNetReest_net3_missModel3_prop3.5_trial12.RData
# 20231010_missNetReest_net3_missModel3_prop3.5_trial19.RData

# no mnars, so the loop won't work.

# handle fail rates separately. these data structures are very unstandardised.

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")

chunkMnarNames = c("20231011_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissPropFloat,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  }
   
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)

  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter       SE missModel
## 3   1.354497 2.909963  mnarErgm
## 18  2.906806 2.973310  mnarErgm
## 13  2.981297 3.469670  mnarErgm
## 9   3.067972 3.937823  mnarErgm
## 2   3.332524 3.476768  mnarErgm
## 4   3.417883 3.512137  mnarErgm

##    parameter       SE missModel
## 10 -4.981270 1.855869  mnarErgm
## 20 -4.887841 1.760132  mnarErgm
## 6  -4.644249 1.794332  mnarErgm
## 8  -4.517936 1.729852  mnarErgm
## 16 -4.301038 1.535839  mnarErgm
## 11 -4.210703 1.294873  mnarErgm

##    parameter        SE missModel
## 12  1.962097 0.2659402  mnarErgm
## 16  2.015492 0.2764007  mnarErgm
## 11  2.038997 0.2944418  mnarErgm
## 19  2.079809 0.3464684  mnarErgm
## 7   2.083504 0.3216842  mnarErgm
## 6   2.096097 0.2653546  mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

### A section specifically for the three re-estimations of net3 that made it
# no MNAR files, so removing them from the code below

# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()

# set a specific index for the network
chosenNetInd = 3

# file list for the chosen network index
chosenNetOutFiles = grep(paste("20231010_missNetReest_net", chosenNetInd, sep =""), outputList)


# loop to load
chosenNetOutList = outputList[chosenNetOutFiles]

for(fileInd in 1:length(chosenNetOutList)){
  # load all files
  load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
  
  # suppress these version difference warnings. 
  suppressWarnings({
  modelResList[[fileInd]] = modelres$coefficients
  modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
  
  })
  
   missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    
  # propmiss should be the same
  propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}

# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
  
  # grab the chosen parameter name
  chosenParaName = modelParaOptions[chosenPara]
  
  # still need to take out true parameter values
  tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
  tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
  
  # including edges
  # test run of the plot function
  testPlotData = prepMissPlot(modelResList = modelResList,
                              modelSeList = modelSeList,
                              missModelList = missModelList,
                              propMissList = propMissList,
                              chosenProp = chunkMissProp,
                              chosenPara = chosenParaName)
  
  # brief inspection just to make sure nothing's wrong
  head(testPlotData) %>% 
    print()
  
  
  # and plot it
  missCaterpillarPlot(plotData = testPlotData,
                      truePara = tempTruePara,
                      trueSe = tempTrueSe,
                      chosenPara = chosenParaName,
                      chosenNetInd = chosenNetInd,
                      chosenProp = chunkMissPropFloat,
                      chosenMiss = chosenMissValue) %>% 
    print()

}
##   parameter SE missModel
## 1 -59.62898 NA   marErgm
## 2 -31.73605 NA    Latent
## 3 -17.05925 NA    Latent

##   parameter SE missModel
## 1 15.661456 NA   marErgm
## 2  1.350935 NA    Latent
## 3  5.424564 NA    Latent

##    parameter SE missModel
## 1 -0.2770482 NA   marErgm
## 3 -1.1609499 NA    Latent
## 2 12.4001035 NA    Latent

# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
                            missModelList = missModelList,
                            chosenNetInd = chosenNetInd,
                            chosenProp = chunkMissProp,
                            chosenMiss = chosenMissValue)

# and bind it
failRateData = rbind(failRateData, tempFailData)

60% Peter

# 60% Peter
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6

# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1)

# note: 
# net 3 is still so messed up it has two converged estimation for this set (out of 200)
# 20231011_missNetReest_net3_missModel2_prop6_trial40
# 20231011_missNetReest_net3_missModel3_prop6_trial46

# no mnars, so the loop won't work.

# handle fail rates separately. these data structures are very unstandardised.

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  }
   
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)

  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##     parameter       SE missModel
## 1  -19.471644       NA     Indep
## 2   -5.164890 1.341253     Indep
## 3  -12.764592       NA   marErgm
## 12  -4.443987 1.069731  mnarErgm
## 8   -4.212305 1.364534  mnarErgm
## 26  -4.030651 1.201752  mnarErgm

##     parameter         SE missModel
## 2   -1.676416  0.2491544     Indep
## 1    3.753020         NA     Indep
## 3    2.523763         NA   marErgm
## 24 -16.309377 12.3213518  mnarErgm
## 31  -7.352189  4.5866855  mnarErgm
## 13  -6.388176  3.7709916  mnarErgm

##    parameter        SE missModel
## 1   2.895285        NA     Indep
## 2   4.855019 0.6582781     Indep
## 3   1.836877        NA   marErgm
## 20  1.610596 0.6224760  mnarErgm
## 30  1.847450 0.3734943  mnarErgm
## 24  1.860921 0.2396976  mnarErgm

### A section specifically for the three re-estimations of net3 that made it
# no MNAR files, so removing them from the code below

# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()

# set a specific index for the network
chosenNetInd = 3

# file list for the chosen network index
chosenNetOutFiles = grep(paste("20231011_missNetReest_net", chosenNetInd, sep =""), outputList)


# loop to load
chosenNetOutList = outputList[chosenNetOutFiles]

for(fileInd in 1:length(chosenNetOutList)){
  # load all files
  load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
  
  # suppress these version difference warnings. 
  suppressWarnings({
  modelResList[[fileInd]] = modelres$coefficients
  modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
  
  })
  
   missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    
  # propmiss should be the same
  propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}

# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
  
  # grab the chosen parameter name
  chosenParaName = modelParaOptions[chosenPara]
  
  # still need to take out true parameter values
  tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
  tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
  
  # including edges
  # test run of the plot function
  testPlotData = prepMissPlot(modelResList = modelResList,
                              modelSeList = modelSeList,
                              missModelList = missModelList,
                              propMissList = propMissList,
                              chosenProp = chunkMissProp,
                              chosenPara = chosenParaName)
  
  # brief inspection just to make sure nothing's wrong
  head(testPlotData) %>% 
    print()
  
  
  # and plot it
  missCaterpillarPlot(plotData = testPlotData,
                      truePara = tempTruePara,
                      trueSe = tempTrueSe,
                      chosenPara = chosenParaName,
                      chosenNetInd = chosenNetInd,
                      chosenProp = chunkMissProp,
                      chosenMiss = chosenMissValue) %>% 
    print()

}
##    parameter SE missModel
## 1  -5.022512 NA   marErgm
## 2 -62.259251 NA    Latent

##   parameter SE missModel
## 1  2.548622 NA   marErgm
## 2 17.492617 NA    Latent

##   parameter SE missModel
## 1 -1.027786 NA   marErgm
## 2 -2.287233 NA    Latent

# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
                            missModelList = missModelList,
                            chosenNetInd = chosenNetInd,
                            chosenProp = chunkMissProp,
                            chosenMiss = chosenMissValue)

# and bind it
failRateData = rbind(failRateData, tempFailData)

10% Todd

Literally nothing. No successful re-estimations for any missingness model for net 3.

35% Todd

# 35% missingness - Todd
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "TOdd"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5


# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = 3

# handle fail rates separately. these data structures are very unstandardised.

# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissPropFloat,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  }
   
  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)

  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter       SE missModel
## 1 -2.2089033 1.681170     Indep
## 2 -2.7527563 1.471588    Latent
## 3  0.9039891 1.649547  mnarErgm

##   parameter        SE missModel
## 1 -1.112195 0.5174151     Indep
## 2 -1.248764 0.4822099    Latent
## 3 -1.302105 0.4873029  mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

##   parameter        SE missModel
## 1  2.437951 0.4559972     Indep
## 2  2.961205 0.5207720    Latent
## 3  1.119011 0.2363328  mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).

60% Todd

# 35% missingness - Todd
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "TOdd"))

# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2

# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6


# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]


# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = 3

# note: 
# net 3 is so messed up it has ONE converged estimation for this set 
# 20231010_missNetReest_net3_missModel1_prop1_trial9.RData

# handle fail rates separately. these data structures are very unstandardised.

# TODO: vector of file names for regex
chunkRdataNames = c("20231006_missNetReest_net")

chunkMnarNames = c("20231012_missErgmNetReest_Miss")

# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")

# loop for all specified networks 
for(chunkInd in 1:length(chunkNetworks)){
  
  # make sure the re-estimation lists are empty in the beginning
  modelResList = list()
  modelSeList = list()
  missModelList = list()
  propMissList = list()
  
  # set a specific index for the network
  chosenNetInd = chunkNetworks[chunkInd]
  
  # file list for the chosen network index
  chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
  
  ## regex the MNAR files 
  chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
  
  # combine the two indices
  combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
  
  # check to make sure there aren't any duplicates
  if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
    stop("Duplicate re-estimation datafiles! BAD.")
  }
  
  # loop to load
  chosenNetOutList = outputList[combinedNetOutList]
  
  for(fileInd in 1:length(chosenNetOutList)){
    # load all files
    load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
    
    # suppress these version difference warnings. 
    suppressWarnings({
    modelResList[[fileInd]] = modelres$coefficients
    modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
    
    })
    
    # slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
    
    # this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
    if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
     missModelList[[fileInd]] = 4
    } else {
     missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
    }
      
    # propmiss should be the same
    propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
  }
  
  # this can be a loop!
  for( chosenPara in 1:length(modelParaOptions)){
    
    # grab the chosen parameter name
    chosenParaName = modelParaOptions[chosenPara]
    
    # still need to take out true parameter values
    tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
    tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
    
    # including edges
    # test run of the plot function
    testPlotData = prepMissPlot(modelResList = modelResList,
                                modelSeList = modelSeList,
                                missModelList = missModelList,
                                propMissList = propMissList,
                                chosenProp = chunkMissProp,
                                chosenPara = chosenParaName)
    
    # brief inspection just to make sure nothing's wrong
    head(testPlotData) %>% 
      print()
    
    
    # and plot it
    missCaterpillarPlot(plotData = testPlotData,
                        truePara = tempTruePara,
                        trueSe = tempTrueSe,
                        chosenPara = chosenParaName,
                        chosenNetInd = chosenNetInd,
                        chosenProp = chunkMissProp,
                        chosenMiss = chosenMissValue) %>% 
      print()
  
  }

  # a temporary data frame from the output to be binded to the big data frame
  tempFailData = prepFailPlot(propMissList = propMissList,
                              missModelList = missModelList,
                              chosenNetInd = chosenNetInd,
                              chosenProp = chunkMissProp,
                              chosenMiss = chosenMissValue)

  # and bind it
  failRateData = rbind(failRateData, tempFailData)

}
##    parameter        SE missModel
## 1  -3.962095 0.9023328   marErgm
## 2  -1.183762 1.5346827    Latent
## 3 -14.485997        NA  mnarErgm

##    parameter        SE missModel
## 1  0.6043238 0.3158800   marErgm
## 2 -0.3741873 0.4741993    Latent
## 3 -1.6860124        NA  mnarErgm

##   parameter        SE missModel
## 1 0.1417893 0.1547958   marErgm
## 2 0.5251103 0.1820051    Latent
## 3 9.7456273        NA  mnarErgm

Failure rate plot

Wrangling

# plotting the failure rate
# data frame for the completely failed missingness combinations
# just for the formatting
tempFailData = failRateData[1,]

# todd 10s
tempFailData[1, ] = c(1, 3, 1, 1, "0")
tempFailData[2, ] = c(1, 3, 2, 1, "0")
tempFailData[3, ] = c(1, 3, 3, 1, "0")
tempFailData[4, ] = c(1, 3, 4, 1, "0")

# peter 10s
tempFailData[5, ] = c(1, 1, 1, 1, "NA")
tempFailData[6, ] = c(1, 1, 2, 1, "NA")
tempFailData[7, ] = c(1, 1, 3, 1, "NA")
tempFailData[8, ] = c(1, 3, 2, 1, "NA")
tempFailData[9, ] = c(1, 3, 3, 1, "NA")
tempFailData[10, ] = c(1, 3, 4, 1, "NA")

# todd 35s
tempFailData[11, ] = c(1, 3, 2, 3, "0")

# peter 35s
tempFailData[12, ] = c(1, 1, 1, 3, "NA")
tempFailData[13, ] = c(1, 1, 2, 3, "NA")
tempFailData[14, ] = c(1, 1, 3, 3, "NA")
tempFailData[15, ] = c(1, 3, 1, 3, "NA")
tempFailData[16, ] = c(1, 3, 4, 3, "NA")

# todd 60
tempFailData[17, ] = c(1, 3, 1, 6, "0")

# peter 60
tempFailData[18, ] = c(1, 1, 3, 6, "NA")
tempFailData[19, ] = c(1, 3, 1, 6, "NA")
tempFailData[20, ] = c(1, 3, 4, 6, "NA")

# object type stuff
tempFailData$failRate = as.numeric(tempFailData$failRate) 
tempFailData$netInd = as.numeric(tempFailData$netInd) 
tempFailData$missModel = as.numeric(tempFailData$missModel) 
tempFailData$missProp = as.numeric(tempFailData$missProp) 

# and bind
failRateData = rbind(failRateData, tempFailData)

# check for duplicates
if(any(duplicated(failRateData))){
  
  # take only distinct elements
  failRateData = distinct(failRateData)
}



# give the fail rate data (keep it untouched) a unique variable for the combination of missingness
failRatePlotData = failRateData %>% 
  group_by(missProp, missSave) %>% 
  mutate(missCondition = cur_group_id()) %>%
  ungroup()


# turn variables into factors for plotting
failRatePlotData$missModel = factor(failRatePlotData$missModel, levels = c(1,2,3,4), labels = c("Indep", "marErgm", "Latent", "mnarErgm"))
failRatePlotData$missCondition =  factor(failRatePlotData$missCondition, levels = c(1,2,3,4,5,6), labels = c("Todd10", "Peter10", "Todd35", "Peter35", "Todd60", "Peter60"))

Plotting

# subsetting per network because between-network comparisons aren't too informative
# within network comparisons where I have a specific group index for every combination of missSave and missProp (6 categories total)

selectedNets = c(1,2,3,4,5,6)

for(chosenNetInd in selectedNets){

# start with a subset of net 2
failRateSubset = failRatePlotData %>% 
  filter(netInd == chosenNetInd)

# and now we have plot-ready data
print(ggplot(failRateSubset, aes(fill = missModel, y = failRate, x = missCondition)) + 
  geom_bar(position="dodge", stat="identity") + 
  xlab("Missingness condition") + 
  ylab("Failure rate") + 
  ggtitle(paste("Failure rate - Net", chosenNetInd)) + 
  theme_classic() + 
  theme(axis.ticks.x = element_blank()) +
  ylim(0,1) )
  

}

Relative metrics

Relative bias was defined as:

\[rBias = \frac{\widetilde{\theta} - \theta}{\theta},\]

with \(\widetilde{\theta}\) representing the re-estimated parameter value and \(\theta\) representing the true parameter value.

The relative standard error is defined as: \[rSe = \frac{SE(\widetilde{\theta})}{SE(\theta)}.\]

# change some variable types
relBiasData$netInd = as.factor(relBiasData$netInd)
relSeData$netInd = as.factor(relSeData$netInd)

missPropChoices = c(1, 3, 6)
paraChoices = c("edges", "altStar", "gwesp")

# loop it

for(chosenProp in missPropChoices){
  for(chosenPara in paraChoices){
    
    # I don't even know what kind of plot this is...
    # but I need to do some wrangling and subsetting.
    # need to subset some stuff
    relBiasSub = relBiasData %>% 
      filter(propMiss == chosenProp & paraName == chosenPara)
    
    # use the 2.5% and 97.5% quantiles for the axis limits because some of them have some VERY extreme values
    tempBiasBounds = c(quantile(relBiasSub$rBias, probs = c(0.025, 0.975)))
    
    
    # so the plot would be something like
    relBiasPlot = ggplot(relBiasSub, aes(x = netInd, y = rBias, fill = missModel)) + 
                    geom_boxplot(position = position_dodge()) + 
                    facet_wrap(~missSave) + 
                    theme_classic() + 
                    geom_hline(yintercept = 0, col = "black", lty = 2) + 
                    ylim((tempBiasBounds + c(-0.5, 0.5))) +          # and add a bit of wiggle room
                    xlab("Network no.") + 
                    ylab("Relative bias value") + 
                    ggtitle(paste("Proportion", ifelse(chosenProp == 3, yes = 3.5, no = chosenProp), "- ", chosenPara)) # the ifelse is because of how I've regexed 35% missingness
    
    # print plot
    print(relBiasPlot)
    
    # and another one for variance (technically relative standard error... but yeah)
    relSeSub = relSeData %>% 
      filter(propMiss == chosenProp & paraName == chosenPara)
    
    # just the 97.5% quantile because this is strictly positive
    tempVarUpper = quantile(relSeSub$rSe, probs = c(0.975), na.rm = TRUE)
    
    # so the plot would be something like
    relSePlot = ggplot(relSeSub, aes(x = netInd, y = rSe, fill = missModel)) + 
                    geom_boxplot(position = position_dodge()) + 
                    facet_wrap(~missSave) + 
                    theme_classic() + 
                    geom_hline(yintercept = 1, col = "black", lty = 2) + 
                    ylim(0, tempVarUpper + 0.5) +         # some wiggle room
                    xlab("Network no.") + 
                    ylab("Relative variance value") + 
                    ggtitle(paste("Proportion", ifelse(chosenProp == 3, yes = 3.5, no = chosenProp), "- ", chosenPara)) # the ifelse is because of how I've regexed 35% missingness
    
    # print plot
    print(relSePlot)
  
  }
}
## Warning: Removed 40 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 36 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 47 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 25 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 51 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 26 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 19 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 38 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 44 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 35 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 50 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 45 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 23 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 76 rows containing non-finite values (`stat_boxplot()`).

# a repeat without the imposed axes just for comparison
for(chosenProp in missPropChoices){
  for(chosenPara in paraChoices){
    
    # I don't even know what kind of plot this is...
    # but I need to do some wrangling and subsetting.
    # need to subset some stuff
    relBiasSub = relBiasData %>% 
      filter(propMiss == chosenProp & paraName == chosenPara)
    
    # so the plot would be something like
    relBiasPlot = ggplot(relBiasSub, aes(x = netInd, y = rBias, fill = missModel)) + 
                    geom_boxplot(position = position_dodge()) + 
                    facet_wrap(~missSave) + 
                    theme_classic() + 
                    geom_hline(yintercept = 0, col = "black", lty = 2) + 
                    xlab("Network no.") + 
                    ylab("Relative bias value") + 
                    ggtitle(paste("Proportion", ifelse(chosenProp == 3, yes = 3.5, no = chosenProp), "- ", chosenPara)) # the ifelse is because of how I've regexed 35% missingness
    
    # print plot
    print(relBiasPlot)
    
    # and another one for variance (technically relative standard error... but yeah)
    relSeSub = relSeData %>% 
      filter(propMiss == chosenProp & paraName == chosenPara)
    
    # so the plot would be something like
    relSePlot = ggplot(relSeSub, aes(x = netInd, y = rSe, fill = missModel)) + 
                    geom_boxplot(position = position_dodge()) + 
                    facet_wrap(~missSave) + 
                    theme_classic() + 
                    geom_hline(yintercept = 1, col = "black", lty = 2) + 
                    xlab("Network no.") + 
                    ylab("Relative variance value") + 
                    ggtitle(paste("Proportion", ifelse(chosenProp == 3, yes = 3.5, no = chosenProp), "- ", chosenPara)) # the ifelse is because of how I've regexed 35% missingness
    
    # print plot
    print(relSePlot)
  
  }
}

## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 6 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 9 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 17 rows containing non-finite values (`stat_boxplot()`).

## Warning: Removed 49 rows containing non-finite values (`stat_boxplot()`).